#This file assess the robustness by using alternative estimation strategies/kernels 

rm(list=ls(all=TRUE))
graphics.off()
library(rdd)
library(xtable)
library(ri)
library(dplyr)
setwd() # set working directory
load("data/data_alt_boot.rdata")
#drop all from closed and split lists 
data_alt <- data_alt[which(data_alt$marg.los!=0 & data_alt$marg.win != 1 & 
                             data_alt$openlist == 1 & abs(data_alt$margsim) < 1
                           & data_alt$splitlist == 0 &
                             data_alt$simscore > 0 & data_alt$simscore < 1),]

#if cases are extremely close they might have wrong signed simscore. Recode these to arbitrarily small simscore
data_alt$margsim[data_alt$margsim <= 0 & data_alt$elected == 1] <-  0.001
data_alt$margsim[data_alt$margsim >= 0 & data_alt$elected == 0] <- -0.001

data_alt <- na.omit(data.frame(data_alt[c("margsim", "electedt1", "rerun", "elected",
                                          "candpart", "year", "muncpr", "candlino")]))

#find Imbens-Kalyanaraman bandwidth
IK.bw <- IKbandwidth(data_alt$margsim, data_alt$electedt1)
IK.bw2 <- IKbandwidth(data_alt$margsim, data_alt$rerun)

#estimate baseline model
rdest    <- RDestimate(electedt1 ~ margsim, data = data_alt)
rdest1   <- RDestimate(rerun     ~ margsim, data = data_alt)

## Use alternative specifications

## Alternative kernels
IK.bw.epa      <- IKbandwidth(data_alt$margsim, data_alt$electedt1, kernel = "epanechnikov")
IK.bw2.epa     <- IKbandwidth(data_alt$margsim, data_alt$rerun,     kernel = "epanechnikov")

IK.bw.gau      <- IKbandwidth(data_alt$margsim, data_alt$electedt1, kernel = "gaussian")
IK.bw3.gau     <- IKbandwidth(data_alt$margsim, data_alt$rerun,     kernel = "gaussian")

rdest.epa      <- RDestimate(electedt1 ~ margsim, data = data_alt, kernel = "epanechnikov")
rdest1.epa     <- RDestimate(rerun     ~ margsim, data = data_alt, kernel = "epanechnikov")

rdest.gau      <- RDestimate(electedt1 ~ margsim, data = data_alt, kernel = "gaussian")
rdest1.gau     <- RDestimate(rerun     ~ margsim, data = data_alt, kernel = "gaussian")

# run fully parametric specifications on all data

data_alt <-
  data_alt %>%
  mutate(margsim2 = margsim  * margsim,
         margsim3 = margsim2 * margsim,
         margsim4 = margsim3 * margsim)

lm1   <- summary(lm(electedt1 ~ margsim*elected, data_alt))
lm1.1 <- summary(lm(rerun     ~ margsim*elected, data_alt))

lm2   <- summary(lm(electedt1 ~ margsim*elected + margsim2*elected, data_alt))
lm2.1 <- summary(lm(rerun     ~ margsim*elected + margsim2*elected, data_alt))

lm3   <- summary(lm(electedt1 ~ margsim*elected + margsim2*elected + 
                      margsim3*elected, data_alt))
lm3.1 <- summary(lm(rerun     ~ margsim*elected + margsim2*elected + 
                      margsim3*elected, data_alt))

lm4   <- summary(lm(electedt1 ~ margsim*elected + margsim2*elected + 
                      margsim3*elected + margsim4*elected, data_alt))
lm4.1 <- summary(lm(rerun     ~ margsim*elected + margsim2*elected + 
                      margsim3*elected + margsim4*elected, data_alt))

#Create Probability Weights

data_sub1 <- within(subset(data_alt, abs(margsim) < IK.bw), {
  B <- paste(year, muncpr, candpart, sep = "")
})

data_sub1 <- 
  data_sub1 %>%
  group_by(B) %>%
  mutate(size = n()) %>% 
  group_by(elected, B) %>%
  mutate(condition = n()) 

data_sub1 <- within(data_sub1, {
  p     <- condition/size
  tri_weight <- 1-margsim/IK.bw
})

data_sub2 <- within(subset(data_alt, abs(margsim) < IK.bw2), {
  B <- paste(year, muncpr, candpart, sep = "")
})

data_sub2 <- 
  data_sub2 %>%
  group_by(B) %>%
  mutate(size = n()) %>% 
  group_by(elected, B) %>%
  mutate(condition = n()) 

data_sub2 <- within(data_sub2, {
  p     <- condition/size
  tri_weight <- 1-margsim/IK.bw2
})

# Parametric With IPWs
ik.lm1.ipw  <- summary(lm(electedt1 ~ margsim*elected, 
                          data = data_sub1,
                          weights = 1/p))

ik.lm1.ipw.1<- summary(lm(rerun ~ margsim*elected, 
                          data = data_sub2,
                          weights = 1/p))

# Parametric With IPWs and Triangular Weight on bandwidth

ik.lm1.ipw.tw  <- summary(lm(electedt1 ~ margsim*elected, 
                             data = data_sub1,
                             weights = 1/p * tri_weight))

ik.lm1.ipw.tw.1  <- summary(lm(rerun ~ margsim*elected, 
                               data = data_sub2,
                               weights = 1/p * tri_weight))

## Randomization inference based method

## Choose bandwidth

## Find starting point
with(subset(data_alt, abs(margsim) < 0.01), table(margsim>0))

## Iterate over bandwidths
set.seed(191115) # first day of running program 
iter <- 5000
bws <- seq(1:100)/100
for (i in 5:50){
  ri_data <- 
    data_alt %>%
    filter(abs(margsim) < bws[i]) %>%
    mutate(Z = margsim > 0)
  ri_data <- within(ri_data, {
    B <- paste(year, muncpr, candpart, sep = "")
    probs <- genprobexact(Z = Z, blockvar = B)  
  })
  ri_data <- subset(ri_data, probs != 0 & probs != 1)
  p     <- 
    with(ri_data,{
      perms <- genperms(Z = Z, blockvar = B, maxiter = 2*iter/5)
      ate   <- estate(candlino, Z = Z, prob = probs)
      Ys    <- genouts(Y = candlino, Z =Z)
      distout <- gendist(Ys, perms = perms, prob = probs)
      dispdist(distout, ate = ate)$lesser.p.value
    })
  if(p < 0.15) stop(paste("P-value < 0.1 at bandwidth = ", bws[i], sep =""))
}

i <- i - 1
ri_data <- 
  data_alt %>%
  filter(abs(margsim) < bws[i]) %>%
  mutate(Z = margsim > 0)
ri_data <- within(ri_data, {
  B <- paste(year, muncpr, candpart, sep = "")
  probs <- genprobexact(Z = Z, blockvar = B)  
  ri_data <- subset(ri_data, probs != 0)
})
dispdist_elected    <- 
  with(ri_data,{
    perms <- genperms(Z = Z, blockvar = B, maxiter = iter)
    ate   <- estate(electedt1, Z = Z, prob = probs)
    Ys    <- genouts(Y = electedt1, Z =Z, ate = ate)
    distout <- gendist(Ys, perms = perms, prob = probs)
    dispdist <- list(dispdist(distout, ate = ate),
                     estate(electedt1, Z = Z, prob = probs))
  })

dispdist_rerun    <- 
  with(ri_data,{
    perms <- genperms(Z = Z, blockvar = B, maxiter = iter)
    ate   <- estate(rerun, Z = Z, prob = probs)
    Ys    <- genouts(Y = rerun, Z =Z, ate = ate)
    distout <- gendist(Ys, perms = perms, prob = probs)
    dispdist <- list(dispdist(distout, ate = ate),
                     estate(rerun, Z = Z, prob = probs))
  })

#save estimates and standard errors in vector
robests <- c(rdest$est[1],
             rdest.epa$est[1],
             rdest.gau$est[1],
             lm1$coefficients[3,1],
             lm2$coefficients[3,1],
             lm3$coefficients[3,1],
             lm4$coefficients[3,1],
             dispdist_elected[[2]],
             
             rdest1$est[1],
             rdest1.epa$est[1],
             rdest1.gau$est[1],
             lm1.1$coefficients[3,1],
             lm2.1$coefficients[3,1],
             lm3.1$coefficients[3,1],
             lm4.1$coefficients[3,1],
             dispdist_rerun[[2]])

robses  <- c(rdest$se[1],
             rdest.epa$se[1],
             rdest.gau$se[1],
             lm1$coefficients[3,2],
             lm2$coefficients[3,2],
             lm3$coefficients[3,2],
             lm4$coefficients[3,2],
             
             rdest1$se[1],
             rdest1.epa$se[1],
             rdest1.gau$se[1],
             lm1.1$coefficients[3,2],
             lm2.1$coefficients[3,2],
             lm3.1$coefficients[3,2],
             lm4.1$coefficients[3,2])

#create plot vector
graphics.off()
quartz(w=8,h = 5)
par(mar=c(4,14,2,1),
    las = 1)
ylabs <- rev(c( "Benchmark estimate",
                "Epanechnikov kernel", 
                "Gaussian kernel",
                "1st order polynomial w/ all data",
                "2nd order polynomial w/ all data",
                "3rd order polynomial w/ all data",
                "4th order polynomial w/ all data",
                "Randomization Inference"))

plot(robests[1:8], 8:1, 
     xlim = c(-0.5,2.3),
     ylim = c(0.75,9),
     pch = 19,
     axes = FALSE,
     ylab = "",
     xlab = "Incumbency advantage")
axis(1, 
     at = c(-0.5,0,0.5))
axis(1, 
     at = c(-0.5,0,0.5)+1.7, 
     labels = c(-0.5,0,0.5))
axis(2,
     at = 1:8,
     tick = FALSE,
     labels = ylabs)


segments(c(robests[1:7] + qnorm(0.025)*robses[1:7],
           dispdist_elected[[1]]$quantile[1]), 
         8:1,
         c(robests[1:7] + qnorm(0.975)*robses[1:7],
           dispdist_elected[[1]]$quantile[2]), 
         8:1,
         lwd = 2)

points(robests[9:16]+1.7, 8:1, pch = 15)
segments(c(robests[9:15] + qnorm(0.025)*robses[8:14], 
           dispdist_rerun[[1]]$quantile[1]) + 1.7, 
         8:1,
         c(robests[9:15] + qnorm(0.975)*robses[8:14], 
           dispdist_rerun[[1]]$quantile[2]) + 1.7, 
         8:1,
         lwd = 2)

lines(c(0,0),c(-0.2,8.2),
      lty = 2,
      col = "grey50")

lines(c(1.7,1.7),c(-0.2,8.2),
      lty = 2,
      col = "grey50")

legend("topright", 
       c("Effect on p(rerunning and elected)", "Effect on p(rerunning)"), 
       bty = "n",
       pch = c(19,15),
       lty = 1)

## Put estimates in table for SI
si_table <- cbind(round(robests[1: 8],3),
                  paste("(", round(c(robses[1: 7],
                                     dispdist_elected[[1]]$sd),3), ")", sep=""),
                  round(robests[9:16],3),
                  paste("(", round(c(robses[8:14],
                                     dispdist_rerun[[1]]$sd),3), ")", sep=""))

colnames(si_table) <- c("Pr(rerun and elected)"," ", "Pr(rerun", "  ")
rownames(si_table) <- rev(ylabs)

xtable(si_table)
